{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from advisor_backend.interface import Interface\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 算子调优分析\n",
    "## 1. 算子分析的数据准备\n",
    "当前算子分析工具支持分析Ascend Pyorch Profiler方式生成的ascend_pt目录以及ascend_pt/ASCEND_PROFILER_OUTPUT/kernek_details.csv文件\n",
    "## 2. 算子分析解决的问题\n",
    "当前支持分析模型中存在可融合的小算子，并给出优化建议。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[INFO] Start to analyse the target file: [YOUR PATH]\n",
      "        pattern_name                                            pattern  len  \\\n",
      "0   bias_dropout_add                          (Add, DropOutDoMask, Add)    3   \n",
	  "7       AddLayerNorm                                 (Add, LayerNormV3)    2   \n",
	  "10                FA  (BatchMatMul, RealDiv, Add, Maximum, SoftmaxV2...    0   \n",
	  "16               FFN         (MatMulV2, Swish, MatMulV2, Mul, MatMulV2)    0   \n",
	  "15         RotaryMul        (Mul, Slice, Neg, Slice, ConcatD, Mul, Add)    0   \n",
	  "14         RotaryMul  (Mul, AsStrided, Neg, AsStrided, ConcatD, Mul,...    0   \n",
	  "13         RotaryMul  (Mul, Slice, Neg, Slice, ConcatD, Cast, Mul, Add)    0   \n",
	  "12                FA  (BatchMatMulV2, RealDiv, Add, Cast, SoftmaxV2,...    0   \n",
	  "11                FA  (BatchMatMulV2, RealDiv, Add, Cast, Maximum, C...    0   \n",
	  "9            RMSNorm  (Cast, Square, MemSet, ReduceMean, Add, Rsqrt,...    0   \n",
      "1                 FA  (BatchMatMul, Mul, Cast, Mul, MaskedFill, Soft...    0   \n",
	  "8            GeluAdd                                        (Gelu, Add)    0   \n",
	  "6       AddLayerNorm                                   (Add, LayerNorm)    0   \n",
	  "5          LayerNorm                            (Cast, LayerNorm, Cast)    0   \n",
	  "4            RMSNORM  (Cast, Square, ReduceMeanD, Add, Rsqrt, Cast, ...    0   \n",
	  "3                 FA  (Transpose, BatchMatMulV2, Transpose, Transpos...    0   \n",
      "2                 FA  (Transpose, Transpose, Transpose, Mul, Transpo...    0   \n",
      "17     GatherElement   (Transpose, Transpose, GatherElement, Transpose)    0   \n",
      "\n",
      "    count  duration sum(us)          op durations(us)              index  \n",
      "0       4           2178.16  [839.64, 464.04, 874.48]   [52, 64, 87, 99]  \n",
	  "7       4           2154.98          [874.48, 1280.5]  [54, 66, 89, 101]  \n",
	  "10      0              0.00                         0                  0  \n",
	  "16      0              0.00                         0                  0  \n",
	  "15      0              0.00                         0                  0  \n",
	  "14      0              0.00                         0                  0  \n",
	  "13      0              0.00                         0                  0  \n",
	  "12      0              0.00                         0                  0  \n",
	  "11      0              0.00                         0                  0  \n",
	  "9       0              0.00                         0                  0  \n",
      "1       0              0.00                         0                  0  \n",
	  "8       0              0.00                         0                  0  \n",
	  "6       0              0.00                         0                  0  \n",
	  "5       0              0.00                         0                  0  \n",
	  "4       0              0.00                         0                  0  \n",
	  "3       0              0.00                         0                  0  \n",
      "2       0              0.00                         0                  0  \n",
      "17      0              0.00                         0                  0  \n",
      "The computing time of fusable op is 4333.14 ms.\n",
      "Advice:\n",
      "Replace [Add, DropOutDoMask, Add] with bias_dropout_add.\n",
      "Replace [Add, LayerNormV3] with AddLayerNorm.\n"
     ]
    }
   ],
   "source": [
    "# EDIT THE PROFILING DATA PATH\n",
    "compute_path = \"[YOUR PATH]\"\n",
    "interface = Interface(compute_path)\n",
    "data = interface.get_data('compute', 'npu_fused')\n",
    "\n",
    "print(data['data'])\n",
    "print(data['bottleneck'])\n",
    "print(data['advice'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "qkd",
   "language": "python",
   "name": "qkd"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
